export CUDA_DEVICE_ORDER=PCI_BUS_ID 
export CUDA_VISIBLE_DEVICES=0,1
export HF_HUB_OFFLINE=1

vllm serve /file/ljw22/Qwen2.5-14B-Instruct \
	--served-model-name qwen2.5-14b-instruct \
	--enable-auto-tool-choice \
	--tool-call-parser hermes \
	--max-model-len=32768 \
	--tensor-parallel-size 2 \
	--port 8989
